source('~/Github/diverseScripts/R/scripts/largeDivergingPalettes.R')
library(acidAdaptedRNAseq)
library(GO.db)
library(GOSim)
library(igraph)
library(tidyverse)
library(ggraph)
library(multipanelfigure)
library(grid)
library(gtable)
library(magrittr)
library(printr)
#load GO result and subset signignificant terms
data(topGOresult, package = "acidAdaptedRNAseq")
sig <- topGOresult[topGOresult$classicFisher < 0.05, ]

Download the GO graph for the significant terms and run the spin-glass community detection algorithm. Show the number of detected communities and their size.

Note to self: Check if what getGOgraph is downloading is equal to the ancestors via GO.db.

#get GO graph
g <- downloadGOgraph(sig$GO.ID)

#run community analysis
tmp <- collapseGraph(g)
cg <- tmp[[1]]
comSummary <- tmp[[2]] %>%
    left_join(select(sig, GO.ID, classicFisher), by=c("GOID" = "GO.ID"))

Calculate and show the number of genes included in each community.

#calculate genes per term
uCommunityNames <- unique(comSummary$communityName)
genesPerCom <- lapply(1:length(uCommunityNames), function(x) {
    bool <- comSummary$communityName == uCommunityNames[x]
    idsToGet <- comSummary[bool, ]$GOID
    genesForIdsString <- sig[sig$GO.ID %in% idsToGet, "ID"]
    genesForIdsList <- strsplit(genesForIdsString, ", ")
    length(unlist(genesForIdsList))
})
names(genesPerCom) <- uCommunityNames

namedListToTibble(genesPerCom) %>%
    setNames(c("communityName", "nrCommunityGenes")) %>%
    inner_join(distinct(select(comSummary, communityName, communityTerm))) %>%
    select(communityTerm, nrCommunityGenes, -communityName)
## Joining, by = "communityName"
communityTerm nrCommunityGenes
protein phosphorylation 2571
single-multicellular organism process 3389
protein heterotetramerization 57
Wnt signaling pathway 1002
DNA duplex unwinding 47
store-operated calcium entry 11
translation 281
organophosphate metabolic process 32
RNA stabilization 35
programmed cell death 2556
secretion 3011
cellular metabolic process 9560
RNA metabolic process 1422
cell adhesion 1186
cellular hormone metabolic process 432
single-organism developmental process 16231
B cell activation involved in immune response 1937
multicellular organism metabolic process 144
detoxification 4187
cell cycle 854
single-organism cellular process 3632
ameboidal-type cell migration 673
single organism signaling 21800
cell proliferation 1539
‘de novo’ protein folding 52
multicellular organismal signaling 590
antigen processing and presentation of exogenous peptide antigen 291
antigen receptor-mediated signaling pathway 149
membrane lipid metabolic process 105
somatic recombination of immunoglobulin genes involved in immune response 623
adaptive immune response based on somatic recombination of immune receptors built from immunoglobulin superfamily domains 1686
coagulation 24
phosphorus metabolic process 1838
regulation of anatomical structure size 467
cell differentiation 2869
flagellated sperm motility 2096

Show the similarity between communities.

cgSim <- similarity(cg, mode = "all", method = "jaccard")
colnames(cgSim) <- unique(
    comSummary[match(V(cg)$name, comSummary$communityID), ]$communityTerm
)
rownames(cgSim) <- unique(
    comSummary[match(V(cg)$name, comSummary$communityID), ]$communityTerm
)
heatmap(1 - cgSim, margins = c(20,20))

Set attributes in the collapsed graph.

cg <- addAttributes1(cg, comSummary)

Add back all nodes from the original graph with the exception of the community nodes. Add an edge from each of the added nodes to the corresponding community node. Plot the final result.

#add vertices and edges from uncollapsed graph
toAdd <- comSummary %>%
    filter(!GOID %in% communityID) %>%
    filter(classicFisher < 0.05) %>%
    select(GOID, communityID) %>%
    setNames(c("from", "to"))

cg <- addBackVertexAndEdges(cg, toAdd)

#add attributes
cg <- addAttributes2(cg, comSummary)

#plot
plotCollapsedGraph(cg, 10, 5)

collapsed <- plotCollapsedGraph(cg)
mylegend <- g_legend(collapsed)
collapsed <- collapsed + theme(legend.position="none")